package com.leadingsoft.controller.parse;

import java.util.List;

import org.apache.commons.lang3.StringUtils;
import org.beetl.sql.core.SQLManager;
import org.cef.browser.CefBrowser;
import org.cef.callback.CefStringVisitor;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.leadingsoft.common.Constant;
import com.leadingsoft.common.TaskQuene;
import com.leadingsoft.common.model.Hotel;
import com.leadingsoft.common.model.HotelComment;
import com.leadingsoft.common.model.HotelCommentLog;
import com.leadingsoft.common.model.HotelLog;
import com.leadingsoft.controller.parse.impl.GetCtripHotelComment;

/**
 * @ClassName ParsingHtmlDocument_v2
 * @Description 解析HTML示例<br>
 *              在visit方法中通过编写cssQuery选择器进行抓取数据<br>
 *              示例网站：携程酒店评论信息
 * @author gongym
 * @date 2018年6月9日 下午10:32:26
 */
public class ParsingCtripHotelComment implements CefStringVisitor {
	private static Logger logger = LoggerFactory.getLogger(ParsingCtripHotelComment.class);
	private CefBrowser browser;
	private String listSelector;
	private SQLManager sqlManager;
	private List<Hotel> hotelList;
	private List<String> hotelIdList;

	public ParsingCtripHotelComment(CefBrowser browser, String listSelector, SQLManager sqlManager,
			List<Hotel> hotelList, List<String> hotelIdList) {
		this.browser = browser;
		this.listSelector = listSelector;
		this.sqlManager = sqlManager;
		this.hotelList = hotelList;
		this.hotelIdList = hotelIdList;
	}
	@Override
	public void visit(String string) {
		String url = browser.getURL();
		logger.debug("开始解析当前页URL：{}", url);
		Document htmlDocument = Jsoup.parse(string);
		Elements elementsByClass = htmlDocument.getElementsByClass(listSelector);
		String[] urls = url.split("_");
		String hotelId = StringUtils.remove(urls[0], Constant.CTRIPHOTELCOMMENTURLTEMP);
		Integer pageIndex = Integer.parseInt(urls[1].replace("t0.html", "").replace("p", ""));
		// 检查当前URL是否被解析过了
		HotelLog hotelLogTemplate = new HotelLog();
		hotelLogTemplate.setSpideredUrl(url);
		HotelLog hotelLog = sqlManager.templateOne(hotelLogTemplate);
		if (null == hotelLog) {
			GetCtripHotelComment getCtripHotelComments = new GetCtripHotelComment();
			List<Object> hotelCommentList = getCtripHotelComments.elementsToObjects(elementsByClass);
			// 使用stream修改List中的元素属性
			hotelCommentList.stream().filter(object -> object instanceof HotelComment).forEach(object -> {
				HotelComment hotelComment = (HotelComment) object;
				hotelComment.setHotelId(hotelId);
				hotelComment.setCommentPage(pageIndex);
			});
			sqlManager.insertBatch(HotelComment.class, hotelCommentList);
			HotelCommentLog nowHotelCommentLog = new HotelCommentLog();
			nowHotelCommentLog.setSpideredUrl(url);
			sqlManager.insert(nowHotelCommentLog);
		}
		// 获取酒店评论信息的下一页URL
		String nextUrl = TaskQuene.getCtripHotelCommentListUrl(url, hotelList, hotelIdList);
		if (StringUtils.isEmpty(nextUrl)) {
			TaskQuene.ctripCommentTaskUrl.add(nextUrl.toString());
		} else {
			// 当前任务结束。 关闭浏览器
			browser.close();
		}
	}
}
